<?php

/* 
 * 一些简单的文本处理，包括正则匹配什么的放这块
 * 2016-03-21  邓继松
 */

/*
网页表格提取为数组,返回源文件，参数为表格源文件
 *  */

function get_td_array($table) {
    $table = preg_replace("'<table[^>]*?>'si","",$table);
    $table = preg_replace("'<tr[^>]*?>'si","",$table);
    $table = preg_replace("'<td[^>]*?>'si","",$table);
    $table = str_replace("</tr>","{tr}",$table);
    $table = str_replace("</td>","{td}",$table);
    $table = preg_replace("'<[/!]*?[^<>]*?>'si","",$table);
    $table = preg_replace("'([rn])[s]+'","",$table);
    $table = str_replace(" ","",$table);
    $table = str_replace(" ","",$table);
    $table = explode('{tr}', $table);
    array_pop($table);
    foreach ($table as $key=>$tr) {
    $td = explode('{td}', $tr);
    array_pop($td);
    $td_array[] = $td;
    }
    return $td_array;
}
